In this notebook, we estimate LGM monthly mean precipitation and minimum and maximum precipitation in the western Mediterranean by statistically a downscaling General Circulation Model (GCM) paleoclimate simulation. We first calibrate a Genralized Additive Model (GAM) using observed WorldClim climatologies, DEM-derived topographic variables, and simulations of present day climates using NCAR’s CESM/CCSM4 model. Then we apply this model to CCSM4 simulations of the LGM PMIP3 experiments.

##Preparation ###Setup

First load the necessary R packages.

library(raster)     # functions for managing and processing raster data
library(mgcv)       # functions to fit and analyze GAMs
library(dismo)      # functions to sample points weighted by latitude
library(magrittr)   # piping functions for code readability
library(rasterVis)  # functions for visualizing raster maps
library(ggplot2)    # functions for plotting
library(reshape2)

Observed Climate

Import the observed present-day climatologies which we’ll use to calibrate the model. We use WorldClim data at 5min resolution here, which can be easily downloaded using the getData() function in the raster package.

get.wc <- function(var){
     raster::getData('worldclim', var=var, res = 5, download = T) %>% 
     crop(extent(-20, 40, 30, 58)) %>%     
     set_names(month.name)
}

obs.prc <- get.wc('prec')
obs.tmx <- get.wc('tmax') %>% divide_by(10) # raw data in degrees Celsius * 10
obs.tmn <- get.wc('tmin') %>% divide_by(10) 

GCM Predictors

import.gcm <- function(dir, var, sim = 'lgm', level = 1){
     gcm.in <- brick(dir, var = var, level = level) 
     
     if(sim == 'control'){gcm.in <- gcm.in[[1200:1799]]}
     if(sim == 'lgm'){gcm.in <- gcm.in[[4213:4812]]}
     
     gcm.in <- stackApply(gcm.in, indices = 1:12, fun = mean)
     extent(gcm.in) <- extent(0, 360, -90, 90)
     
     gcm.in %>%    
     rotate %>%
     projectRaster(obs.prc) %>%
     mask(obs.prc)
}

control.prc <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.PRECT.185001-200512.nc', 
                            var = "PRECT", 
                            sim = 'control') %>% multiply_by(2629743830)
## Loading required namespace: ncdf4
control.tmx <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.TREFMXAV.185001-200512.nc', 
                            var = "TREFMXAV", 
                            sim = 'control') %>% subtract(273.15)

control.tmn <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.TREFMNAV.185001-200512.nc', 
                            var = "TREFMNAV", 
                            sim = 'control') %>% subtract(273.15)

control.psl <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.PSL.185001-200512.nc', 
                            var = "PSL", 
                            sim = 'control') %>% divide_by(1000)

control.q <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.QREFHT.185001-200512.nc', 
                           var = "QREFHT", 
                           sim = 'control')

control.u <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.U.185001-200512.nc', 
                          var = "U", 
                          sim = 'control',
                          level = 26)

control.v <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.V.185001-200512.nc', 
                          var = "V", 
                          sim = 'control',
                          level = 26)

control.z <- import.gcm('b40.20th.track1.1deg.005.cam2.h0.Z3.185001-200512.nc', 
                          var = "Z3", 
                          sim = 'control',
                          level = 23) # ~850 geopotential

Topographic Predictors

Generate some topographic predictors from a DEM.

Start with importing a DEM, then calculate aspect and slope maps using the terrain function.

elev <- raster('alt_5m_bil/alt.bil') %>% projectRaster(obs.prc)
aspect <- elev %>% terrain(opt='aspect', unit = 'degrees')
slope <- elev %>% terrain(opt = 'slope', unit = 'degrees')

Calculate the euclidean distance to the ocean, accounting for latitude effects.

dco <- raster('alt_5m_bil/alt.bil') %>% # import WorldClim 5m dem
    crop(extent(-20, 45, 30, 65)) %>%  # start with a wider region to get accurate distances
    reclassify(c(-Inf, Inf, NA, NA, NA, 1)) %>% # reverse NA and non-NA cells
    distance(doEdge = T) %>% # calculate the distances
    projectRaster(elev) %>%
    mask(elev) %>% # mask out ocean cells  
    divide_by(1000) # convert to km

streamplot(stack(control.u[[1]], control.v[[1]]), isField = 'dXY')

streamplot(stack(control.u[[7]], control.v[[7]]), isField = 'dXY')

dir <- (270 - overlay(control.v,  control.u, fun = atan2) * (180 / pi)) %% 360
vel <- sqrt(control.u ^ 2 + control.v ^ 2)

delta <- abs(dir - aspect) #distance between wind angle and slope orientation
values(delta) <- ifelse(values(delta) > 180, 360 - values(delta), values(delta))
vg <- sin(slope * 2 * pi / 180) * cos(delta * pi / 180) * vel * .5

We can expect the model errors to vary by month and by location (i.e. spatially and temporally autocorrelated residuals). Let’s include these variables in the model fitting.

month <- obs.prc %>% setValues(c(as.factor(rep(1:12, each = 241920)))) # each should = ncell(vg)

lat <- elev %>%
     coordinates %>%
     extract( ,2) %>% 
     setValues(elev, .)

lon <- elev %>%
     coordinates %>%
     extract( ,1) %>% 
     setValues(elev, .)

Put everything together.

cal.vars <- sapply(1:12, function(x){ 
  brick(obs.tmx[[x]], obs.tmn[[x]], obs.prc[[x]], control.tmx[[x]], 
    control.tmn[[x]], control.q[[x]], control.prc[[x]], control.psl[[x]], 
    control.z[[x]], elev, dco, vg[[x]], month[[x]], lat, lon) %>%
  setNames(c('obs.tmx', 'obs.tmn', 'obs.prc','TREFMXAV', 'TREFMNAV', 'Q', 'PRC', 
    'PSL', 'Z3', 'elev','dco', 'vg', 'month', 'lat','lon'))
})

rm(control.tmx, control.tmn, control.q,control.prc,control.psl,control.z,control.v,control.u,vg,dir,vel,delta)

Sample the variables at random points, weighting for area distortions due to latitude

cal.data <- lapply(cal.vars, function(x) (raster::extract(x, randomPoints(elev, 20000)) %>% data.frame)) %>% 
  do.call(rbind, .)

Fit the model

let’s start with temperature

fit.tmn <- gam(obs.tmn ~ s(TREFMNAV, bs = 'cr') +
                         s(Z3, bs = 'cr') +
                         s(elev, bs = 'cr') +
                         s(dco, bs = 'cr') +
                         s(month, bs = 're'), 
               method = 'REML', data = cal.data)


fit.tmn
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## obs.tmn ~ s(TREFMNAV, bs = "cr") + s(Z3, bs = "cr") + s(elev, 
##     bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
## 
## Estimated degrees of freedom:
## 8.97 8.94 7.81 8.50 1.00  total = 36.22 
## 
## REML score: 602412.5
summary(fit.tmn)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## obs.tmn ~ s(TREFMNAV, bs = "cr") + s(Z3, bs = "cr") + s(elev, 
##     bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 10.55892    0.01581   667.7   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df       F p-value    
## s(TREFMNAV) 8.972  9.000 87931.6  <2e-16 ***
## s(Z3)       8.945  8.998  5995.8  <2e-16 ***
## s(elev)     7.806  8.491  6897.5  <2e-16 ***
## s(dco)      8.499  8.898   370.1  <2e-16 ***
## s(month)    1.000  1.000 69044.7  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.834   Deviance explained = 83.4%
## -REML = 6.0241e+05  Scale est. = 9.7257    n = 235613
gam.check(fit.tmn)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 10 iterations.
## Gradient range [-0.07719612,0.07719018]
## (score 602412.5 & scale 9.725685).
## Hessian positive definite, eigenvalue range [0.577168,117804.1].
## Model rank =  38 / 38 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                k'   edf k-index p-value
## s(TREFMNAV) 9.000 8.972   1.016    0.90
## s(Z3)       9.000 8.945   0.997    0.42
## s(elev)     9.000 7.806   1.006    0.66
## s(dco)      9.000 8.499   1.001    0.58
## s(month)    1.000 1.000   0.281    0.00
plot(fit.tmn, shade=T, seWithMean = T, pages = 1)

fit.tmx <- gam(obs.tmx ~ s(TREFMXAV, bs = 'cr') +
                         s(Z3, bs = 'cr') +
                         s(elev, bs = 'cr') +
                         s(dco, bs = 'cr') +
                         s(month, bs = 're'),
                method = 'REML', data = cal.data)

fit.tmx
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## obs.tmx ~ s(TREFMXAV, bs = "cr") + s(Z3, bs = "cr") + s(elev, 
##     bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
## 
## Estimated degrees of freedom:
## 8.93 8.95 8.73 7.85 1.00  total = 36.47 
## 
## REML score: 627857.2
summary(fit.tmx)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## obs.tmx ~ s(TREFMXAV, bs = "cr") + s(Z3, bs = "cr") + s(elev, 
##     bs = "cr") + s(dco, bs = "cr") + s(month, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 22.63470    0.01736    1304   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##               edf Ref.df      F p-value    
## s(TREFMXAV) 8.932  8.998 138320  <2e-16 ***
## s(Z3)       8.954  8.999   5021  <2e-16 ***
## s(elev)     8.734  8.967   5004  <2e-16 ***
## s(dco)      7.849  8.546    187  <2e-16 ***
## s(month)    1.000  1.000 144117  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =   0.88   Deviance explained =   88%
## -REML = 6.2786e+05  Scale est. = 12.07     n = 235613
gam.check(fit.tmn)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 10 iterations.
## Gradient range [-0.07719612,0.07719018]
## (score 602412.5 & scale 9.725685).
## Hessian positive definite, eigenvalue range [0.577168,117804.1].
## Model rank =  38 / 38 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##                k'   edf k-index p-value
## s(TREFMNAV) 9.000 8.972   1.004    0.62
## s(Z3)       9.000 8.945   1.000    0.52
## s(elev)     9.000 7.806   0.997    0.50
## s(dco)      9.000 8.499   0.985    0.16
## s(month)    1.000 1.000   0.286    0.00
plot(fit.tmx, shade = T, seWithMean = T, pages = 1)

fit.prc.occur <- gam(factor(obs.prc >= 1) ~ s(PRC),
                    family = binomial, method = 'REML', data = cal.data)

summary(fit.prc.occur)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## factor(obs.prc >= 1) ~ s(PRC)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)    639.3      693.4   0.922    0.357
## 
## Approximate significance of smooth terms:
##          edf Ref.df Chi.sq p-value    
## s(PRC) 6.427  6.748   3204  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.157   Deviance explained = 38.1%
## -REML =  20146  Scale est. = 1         n = 235613
gam.check(fit.prc.occur)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 18 iterations.
## Gradient range [0.06175008,0.06175008]
## (score 20145.75 & scale 1).
## Hessian positive definite, eigenvalue range [4.338675,4.338675].
## Model rank =  10 / 10 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##           k'   edf k-index p-value
## s(PRC) 9.000 6.427   0.947       0
plot(fit.prc.occur, seWithMean = T, shade = T, pages = 1)

fit.prc <- bam(obs.prc ~ s(PRC, bs = 'cr') +
                         s(PSL, bs = 'cr') +
                         s(Q, bs = 'cr') + 
                         s(vg, bs = 'cr') +
                         s(Z3, bs = 'cr') +
                         s(elev, bs = 'cr') +
                         s(dco, bs = 'cr') +
                         s(month, bs = 're'),
               family = Gamma(link = 'log'), method = 'REML', data = cal.data[cal.data$obs.prc>=1,])

fit.prc
## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## obs.prc ~ s(PRC, bs = "cr") + s(PSL, bs = "cr") + s(Q, bs = "cr") + 
##     s(vg, bs = "cr") + s(Z3, bs = "cr") + s(elev, bs = "cr") + 
##     s(dco, bs = "cr") + s(month, bs = "re")
## 
## Estimated degrees of freedom:
## 8.99 8.98 8.67 8.58 8.95 8.32 8.37 
## 1.00  total = 62.85 
## 
## REML score: 126205.7
summary(fit.prc)
## 
## Family: Gamma 
## Link function: log 
## 
## Formula:
## obs.prc ~ s(PRC, bs = "cr") + s(PSL, bs = "cr") + s(Q, bs = "cr") + 
##     s(vg, bs = "cr") + s(Z3, bs = "cr") + s(elev, bs = "cr") + 
##     s(dco, bs = "cr") + s(month, bs = "re")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 3.523807   0.002563    1375   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##             edf Ref.df        F p-value    
## s(PRC)   8.9948  9.000 49653.63  <2e-16 ***
## s(PSL)   8.9754  9.000  2287.80  <2e-16 ***
## s(Q)     8.6718  8.957   636.45  <2e-16 ***
## s(vg)    8.5785  8.882    48.02  <2e-16 ***
## s(Z3)    8.9454  8.998   894.90  <2e-16 ***
## s(elev)  8.3169  8.798  1060.49  <2e-16 ***
## s(dco)   8.3674  8.830  1086.49  <2e-16 ***
## s(month) 0.9999  1.000  6687.05  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.633   Deviance explained = 63.6%
## -REML = 1.2621e+05  Scale est. = 0.18603   n = 217970
gam.check(fit.prc)

## 
## Method: REML   Optimizer: outer newton
## full convergence after 8 iterations.
## Gradient range [-6.225455e-06,4.819515e-06]
## (score 126205.7 & scale 0.1860329).
## Hessian positive definite, eigenvalue range [0.4998529,108981].
## Model rank =  65 / 65 
## 
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
## 
##             k'   edf k-index p-value
## s(PRC)   9.000 8.995   0.975    0.38
## s(PSL)   9.000 8.975   0.973    0.32
## s(Q)     9.000 8.672   0.956    0.04
## s(vg)    9.000 8.578   0.993    0.78
## s(Z3)    9.000 8.945   0.950    0.02
## s(elev)  9.000 8.317   0.965    0.12
## s(dco)   9.000 8.367   0.955    0.04
## s(month) 1.000 1.000   0.928    0.00
plot(fit.prc, shade = T, seWithMean = T, pages = 1)

Paleo

lgm.tmx <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.TREFMXAV.149901-189912.nc', 
                         var = "TREFMXAV") %>% subtract(273.15)

lgm.tmn <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.TREFMNAV.149901-189912.nc', 
                         var = "TREFMNAV") %>% subtract(273.15)

lgm.q <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.QREFHT.149901-189912.nc', 
                           var = "QREFHT")

lgm.psl <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.PSL.149901-189912.nc', 
                         var = "PSL") %>% divide_by(1000)

lgm.prcc <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.PRECC.149901-189912.nc', 
                         var = "PRECC") %>% multiply_by(2629743830)

lgm.prcl <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.PRECL.149901-189912.nc', 
                         var = "PRECL") %>% multiply_by(2629743830)

lgm.prc <- lgm.prcl + lgm.prcc

lgm.u <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.U.149901-189912.nc', 
                          var = "U",
                          level = 26)

lgm.v <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.V.149901-189912.nc', 
                          var = "V",
                          level = 26)

lgm.z <- import.gcm('b40.lgm21ka.1deg.003.cam2.h0.Z3.149901-189912.nc', 
                         var = "Z3", level = 23)



lgm.dir <- (270 - overlay(lgm.v,  lgm.u, fun = atan2) * (180 / pi)) %% 360
lgm.vel <- sqrt(lgm.u ^ 2 + lgm.v ^ 2)
lgm.delta <- abs(lgm.dir - aspect)
values(lgm.delta) <- ifelse(values(lgm.delta) > 180, 360 - values(lgm.delta), values(lgm.delta))
lgm.vg <- sin(slope * 2 * pi / 180) * cos(lgm.delta * pi / 180) * lgm.vel * .5

pred.vars <- sapply(1:12, function(x){ 
  brick(lgm.tmx[[x]], lgm.tmn[[x]], lgm.prc[[x]], lgm.psl[[x]], lgm.q[[x]], lgm.z[[x]], elev, dco, lgm.vg[[x]], month[[x]]) %>% 
    crop(extent(-10, 20, 36, 46)) %>%
    setNames(c('TREFMXAV', 'TREFMNAV', 'PRC', 'PSL', 'Q','Z3', 'elev','dco', 'vg','month'))
})

rm(lgm.tmn,lgm.tmx,lgm.z,lgm.q,lgm.v,lgm.u,lgm.prcc,lgm.prcl,lgm.vg,lgm.prc,lgm.dir,lgm.psl,lgm.vel,lgm.delta)
tmn.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.tmn, type = 'response')}) %>% brick
tmx.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.tmx, type = 'response')}) %>% brick
prc.occur.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.prc.occur, type = 'response')}) %>% brick
prc.recon <- lapply(1:12, function(x){predict(pred.vars[[x]], fit.prc, type = 'response')}) %>% brick
prc.recon <- mask(prc.recon, prc.occur.recon>.7, maskvalue=0, update.value=0)